#!/usr/bin/env bash
# Copyright 2014 GSI, Inc. All rights reserved.
#
#
# Notes:
#
# 1. The script redefines $DDS_LOCATION to WN's working dir.
#
# Arguments:
# $1 - is a number of additional scouts to deploy. If greater than 1, the scout script will execute $1 number of scout scripts.
# $2 - overwrite the number of task slots for this instance of the scout
#

# current working dir
pushd "$(dirname $0)" > /dev/null
WD=`pwd -P`
popd > /dev/null
cd $WD

#
eval LOCK_FILE="$WD/DDSWorker.lock"
eval PID_FILE="$WD/DDSWorker.pid"
eval DDS_AGENT_ID_FILE="$WD/dds-agent.client.id"
eval USER_SCRIPT="$WD/user_worker_env.sh"
# bin name:
# <pakage>-<version>-<OS>-<ARCH>.tar.gz
BASE_NAME="dds-wrk-bin"
BIN_REPO="http://dds.gsi.de/releases/add/"

#=============================================================================
# ***** LOG function *****
#=============================================================================
logMsg()
{
# date format
#RFC-2822:
# *    www, dd mmm yyyy hh:mm:ss +zzzz
#
# Don't use date -R since it's a GNU specific implementation (doesn't work on Mac, for example)
   echo -e "***\t[$(date '+%a, %d %b %Y %T %z')]\t$1"
}
# ************************************************************************
# ***** wait_and_kill *****
# ************************************************************************
wait_and_kill()
{
   # if after 10 sec. a given process still exists send a non-ignorable kill
   WAIT_TIME=10
   cnt=0
   while $(kill -0 $1 &>/dev/null); do
      cnt=$(expr $cnt + 1)
      if [ $cnt -gt $WAIT_TIME ]; then
         logMsg "Process \"$1\" doesn't want to stop. Forcing a non-ignorable kill..."
         kill -9 $1
         break
      fi
      sleep 1
   done
}
#=============================================================================
# ***** delete_tmp_dir  *****
#=============================================================================
delete_tmp_dir()
{
   if [ -L "$_TMP_DIR" ]; then
      logMsg "Security Error: tmp directory of the previous session is a symbolic link."
      return 1
   fi
   if [ -d "$_TMP_DIR" ]; then
      rm -rf "$_TMP_DIR"
   fi
}
#=============================================================================
# ***** clean_up *****
#=============================================================================
# ***** Perform program exit housekeeping *****
clean_up()
{
    logMsg "Starting the cleaning procedure..."

    # Try to stop the server by first sending a TERM signal
    # And if after 10 sec. it still exists send a non-ignorable kill
    logMsg "Gracefully shut down DDS worker process(es): $DDSAGENT_PID"
    kill $DDSAGENT_PID &>/dev/null

    wait_and_kill $DDSAGENT_PID

  logMsg "Calling agent clean up..."
  $dds_agent clean


    # delete the content of the worker package
    # this is needed in case of condor RMS, for example
    # otherwise condor will transfer all files back
    # TODO: delete the rest of files as well
 # BUG: We must not delete pod-user-defaults and its dependencies,
 #      since many plug-ins use it in stage-out processes.
 #      Need to think how to delete it only in the condor plug-in case
 #   to_delete=$(tar -ztf pod-worker)
 #   if (( $? == 0 )) ; then
 #      rm -vf $to_delete
 #   fi

    # delete tmp directory
    delete_tmp_dir

    # clean up after yourself, and release your trap
    rm -f "$LOCK_FILE"
    rm -f "$PID_FILE"
    # remove dds-agent ID file
    rm -f "$DDS_AGENT_ID_FILE"

    logMsg "done cleaning up."
    exit $1
}
#=============================================================================
# ***** get_freeport *****
#=============================================================================
# ***** returns a free port from a given range  *****
get_freeport()
{
   for(( i = $1; i <= $2; ++i ))
   do
      if [ "$OS" = "Darwin" ]; then
         netstat -an -p tcp 2>/dev/null | grep ".$i " | egrep -i "listen|time_wait" &>/dev/null || { echo $i; exit 0; }
      else
         netstat -ant 2>/dev/null | grep ":$i " | egrep -i "listen|time_wait" &>/dev/null || { echo $i; exit 0; }
      fi
   done

   echo "Error: Cant find a free socket port in the given range: $1 - $2"
   exit 1
}
#=============================================================================
# ***** check_arch *****
# so far we support only Linux (amd64 and x86) and MacOSX (tested on 10.6)
#=============================================================================
check_arch()
{
   OS=$(uname -s 2>&1)
   case "$OS" in
      "Linux"|"Darwin")
         ;;
      *)
         logMsg "Error: DDS doesn't support this operating system. Exiting..."
         clean_up 1
         ;;
   esac

   wn_host_arch=$(uname -m  2>&1)
   case "$wn_host_arch" in
      i[3-9]86*|x86|x86pc|k5|k6|k6_2|k6_3|k6-2|k6-3|pentium*|athlon*|i586_i686|i586-i686)
         host_arch="x86"
         ;;
      x86_64)
         host_arch="amd64"
         ;;
      *)
         logMsg "Error: unsupported architecture: $host_arch"
         clean_up 1
   ;;
   esac
   logMsg "host's CPU/instruction set: $host_arch"

   #MacOSX is using always universal bins
   if [ "$OS" == "Darwin" ]; then
      host_arch="universal"
      logMsg "using universal MacOSX bins"
   fi
}
#=============================================================================
# ***** untar_payload *****
#
#=============================================================================
payload_uuencode=1
payload_binary=0
function untar_payload()
{
  match=$(grep --text --line-number '^PAYLOAD:$' $0 | cut -d ':' -f 1)
  payload_start=$((match + 1))
  if [[ $payload_binary -ne 0 ]]; then
    tail -n +$payload_start $0 | tar -xzvf -
  fi
  if [[ $payload_uuencode -ne 0 ]]; then
    tail -n +$payload_start $0 | uudecode | tar -xzvf -
  fi
}
# ************************************************************************
#
#         M A I N
#
# ************************************************************************

# check for lock file
if ( set -o noclobber; echo "$$" > "$LOCK_FILE") 2> /dev/null; then
  logMsg "Starting DDS Scout's main block"
else
  logMsg "Error: There is already a DDS session running in the directory: $WD"
  exit 1
fi

echo $$ > $PID_FILE

# handle signals
trap clean_up SIGHUP SIGINT SIGTERM

# On some machines LC_ALL is not set which leads to an exception in BOOST.
export LC_ALL=C

# print the environment
env

logMsg "+++ DDS Worker START +++"
logMsg "Current working directory: $WD"

# **********************************************************************
# *** untar payload
# **********************************************************************
logMsg "Untar payload..."
untar_payload

# Define DDS commander location location
# It's used by plug-ins working on the same machine as commander
# Define $DDS_COMMANDER_BIN_LOCATION only for parent WN scripts. Shildren re-use the parents DDS_COMMANDER_BIN_LOCATION
if [ -z $DDS_COMMANDER_BIN_LOCATION ]; then 
	export DDS_COMMANDER_BIN_LOCATION=$DDS_LOCATION/bin
	export DDS_COMMANDER_LIBS_LOCATION=$DDS_LOCATION/lib
fi

echo "DEBUG=$DDS_LOCATION"

# Exporting DDS location
export DDS_LOCATION=$WD

# default WN locations
DDS_BINARIES_LOCATION=$DDS_LOCATION/
export DDS_LIBS_LOCATION=$DDS_LOCATION/

# getting the version of DDS
PKG_VERSION=$(cat $WD/version)

# execute user's script if present
if [ -r $USER_SCRIPT ]; then
   logMsg "Sourcing a user defined environment script..."
   source $USER_SCRIPT
   logMsg "Current environment: "
   env
fi

# host's CPU/instruction set
check_arch

# A WN package without binaries 
LIGHTWEIGHT_PKG=""

# use pre-compiled binaries from the worker package
# ***** prepare pre-compiled wn binaries *****
WN_BIN_ARC="$WD/$BASE_NAME-$PKG_VERSION-$OS-$host_arch.tar.gz"
if [ ! -f "$WN_BIN_ARC" ]; then
	# Checking for a lightweight package
	echo "DDS_COMMANDER_BIN_LOCATION=$DDS_COMMANDER_BIN_LOCATION"
	if [ -x  $DDS_COMMANDER_BIN_LOCATION/dds-agent ]; then
		DDS_BINARIES_LOCATION=$DDS_COMMANDER_BIN_LOCATION
		DDS_LIBS_LOCATION=$DDS_COMMANDER_LIBS_LOCATION
		LIGHTWEIGHT_PKG="YES"
	else
   	   logMsg "Error: Can't find WN pre-compiled bin.: $WN_BIN_ARC"
   	   clean_up 1
   	fi
fi

# un-tar without creating a sub-directory
if [ -z "$LIGHTWEIGHT_PKG" ]; then
	tar --strip-components=1 -xzf $WN_BIN_ARC || clean_up 1
fi

export PATH=$DDS_BINARIES_LOCATION:$PATH
if [ "$OS" == "Linux" ]; then
   export LD_LIBRARY_PATH=$DDS_LIBS_LOCATION:$LD_LIBRARY_PATH
fi
if [ "$OS" == "Darwin" ]; then
   export DYLD_LIBRARY_PATH=$DDS_LIBS_LOCATION:$DYLD_LIBRARY_PATH
fi

# Transmitting an executable through the InputSandbox does not preserve execute permissions
chmod +x $DDS_BINARIES_LOCATION/dds-agent
chmod +x $DDS_BINARIES_LOCATION/dds-user-defaults

user_defaults="$DDS_BINARIES_LOCATION/dds-user-defaults"
dds_agent="$DDS_BINARIES_LOCATION/dds-agent"

# check binary
$dds_agent --version
if (( $? != 0 )) ; then
   logMsg "Error: Can't find a suitable pre-compiled binary for this system."
   clean_up 1
fi

#
# This timestamp is automatically set when DDS job is submitted to RMS
# The dds-prep-worker is responsible to set it
#
export DDS_WN_SUBMIT_TIMESTAMP=0000

# deploy more agents on this host if needed
if [ -n "$1" ]; then
  if [ $1 -gt 1 ]; then
    logMsg "This scout is requested to deploy additionally (($1-1)) agents on that machine..."
    NUM_WNs=1
    while [ $NUM_WNs -lt $1 ]
    do
      # create working dorectory for the child agent
      WD_child_name="$(basename "$WD")_$NUM_WNs"
      logMsg "Deploying child: $WD_child_name"
      eval WD_child="$WD/../$WD_child_name"
      mkdir $WD_child

      # copy WN package into childs directory
      cp $0 $WD_child/

      pushd $(pwd) &> /dev/null
      cd $WD_child
      # execute DDS scout scrip of the child instance
      logMsg "Executing DDS scout of the child: $(pwd)/$(basename $0)"
      $(pwd)/$(basename $0) &> $WD_child/scout.log &

      popd &> /dev/null
      let NUM_WNs+=1
    done
  fi
fi

# User environemnt script could have changed the DDS_LOCATION
# Check that and reset it with a warning to user
if [ "$DDS_LOCATION" != "$WD" ]; then
    export DDS_LOCATION=$WD
    logMsg "WARNING: DDS_LOCATION envvar has been reset by user. Setting it back to $DDS_LOCATION"
fi

logMsg "---=== Print current environment ===---"
env

# if dds-agent goes down or is crashed, we will try to restart it AGENT_MAX_RESTART_COUNT times
start_time=$((date +%s) 2>/dev/null)
AGENT_RESTART_COUNT=0
AGENT_MAX_RESTART_COUNT=3
while [ "$AGENT_RESTART_COUNT" -lt "$AGENT_MAX_RESTART_COUNT" ]
do
   logMsg "Attempt to start dds-agent ($(expr $AGENT_RESTART_COUNT + 1) out of $AGENT_MAX_RESTART_COUNT)"

   # we try for 10 times to detect/start xpd
   # it is needed in case when several DDS workers are started in the same time on one machine

   # _DDS_AGENT_SLOTS_ is replaced (set) by the dds-prep-worker
   if [ -n "$2" ]; then
	   $dds_agent start --slots $2&
   else
   	   $dds_agent start --slots _DDS_AGENT_SLOTS_&
   fi
   # wait for dds-agent's process
   DDSAGENT_PID=$!
   wait $DDSAGENT_PID
   dds_exit_code=$?
   logMsg "dds-agent is done, exit code: $dds_exit_code"
   # code == 100 --- Reserved. We restart if agent returns this code
   if (( $dds_exit_code == 100 )) ; then
 #     logMsg "looks like xproofd has gone or has crashed..."
 #     # Reset the agent restart counter after 10 min
 #     AGENTCOUNTER_RESET_TIMEOUT=10
 #     stop_time=$((date +%s) 2>/dev/null)
 #     duration_s=$((expr $stop_time - $start_time) 2>/dev/null)
 #     duration_m=$((expr $duration_s / 60) 2>/dev/null)
 #     if (( $duration_m > $AGENTCOUNTER_RESET_TIMEOUT )) ; then
 #        logMsg "There were more than $AGENTCOUNTER_RESET_TIMEOUT min. since the last restart. Resetting the agent restart counter..."
 #        AGENT_RESTART_COUNT=0
 #     else
 #        AGENT_RESTART_COUNT=$(expr $AGENT_RESTART_COUNT + 1)
 #     fi
 #
 #     start_time=$((date +%s) 2>/dev/null)
 #
 #     if (( $AGENT_RESTART_COUNT <  $AGENT_MAX_RESTART_COUNT )) ; then
 #        logMsg "preparing to restart all PoD WN process..."
 #        # kill xproofd just in case it is still there
 #        kill $xpd_pid &>/dev/null
 #        wait_and_kill $xpd_pid
 #     fi
      continue;
   else
      break;
   fi
done
logMsg "--- DONE ---"

# Exit
clean_up 0
